<!DOCTYPE HTML>
<html lang="zh-CN">


<head>
    <meta charset="utf-8">
    <meta name="keywords" content="scikit-learn系列四：逻辑回归, 欢迎来到，TWOTO 的博客">
    <meta name="description" content="技术、效率、摄影">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
    <meta name="renderer" content="webkit|ie-stand|ie-comp">
    <meta name="mobile-web-app-capable" content="yes">
    <meta name="format-detection" content="telephone=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
    <!-- Global site tag (gtag.js) - Google Analytics -->


    <title>scikit-learn系列四：逻辑回归 | 欢迎来到，TWOTO 的博客</title>
    <link rel="icon" type="image/png" href="/twoto.png">

    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/awesome/css/all.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/materialize/materialize.min.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/aos/aos.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/animate/animate.min.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/lightGallery/css/lightgallery.min.css">
    <link rel="stylesheet" type="text/css" href="/css/matery.css">
    <link rel="stylesheet" type="text/css" href="/css/my.css">

    <script src="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/jquery/jquery.min.js"></script>

<style type="text/css" lang="css">
    #loading-container{
        position: fixed;
        top: 0;
        left: 0;
        min-height: 100vh;
        width: 100vw;
        z-index: 9999;
        display: flex;
        flex-direction: column;
        justify-content: center;
        align-items: center;
        background: #FFF;
        text-align: center;
        /* loader页面消失采用渐隐的方式*/
        -webkit-transition: opacity 1s ease;
        -moz-transition: opacity 1s ease;
        -o-transition: opacity 1s ease;
        transition: opacity 1s ease;
    }
    .loading-image{
        width: 120px;
        height: 50px;
        transform: translate(-50%);
    }
    
    .loading-image div:nth-child(2) {
        -webkit-animation: pacman-balls 1s linear 0s infinite;
        animation: pacman-balls 1s linear 0s infinite
    }

    .loading-image div:nth-child(3) {
        -webkit-animation: pacman-balls 1s linear .33s infinite;
        animation: pacman-balls 1s linear .33s infinite
    }

    .loading-image div:nth-child(4) {
        -webkit-animation: pacman-balls 1s linear .66s infinite;
        animation: pacman-balls 1s linear .66s infinite
    }

    .loading-image div:nth-child(5) {
        -webkit-animation: pacman-balls 1s linear .99s infinite;
        animation: pacman-balls 1s linear .99s infinite
    }
    
   .loading-image div:first-of-type {
        width: 0;
        height: 0;
        border: 25px solid #49b1f5;
        border-right-color: transparent;
        border-radius: 25px;
        -webkit-animation: rotate_pacman_half_up .5s 0s infinite;
        animation: rotate_pacman_half_up .5s 0s infinite;
    }
    .loading-image div:nth-child(2) {
        width: 0;
        height: 0;
        border: 25px solid #49b1f5;
        border-right-color: transparent;
        border-radius: 25px;
        -webkit-animation: rotate_pacman_half_down .5s 0s infinite;
        animation: rotate_pacman_half_down .5s 0s infinite;
        margin-top: -50px;
    }
    @-webkit-keyframes rotate_pacman_half_up {0% {transform: rotate(270deg)}50% {transform: rotate(1turn)}to {transform: rotate(270deg)}}

    @keyframes rotate_pacman_half_up {0% {transform: rotate(270deg)}50% {transform: rotate(1turn)}to {transform: rotate(270deg)}}

    @-webkit-keyframes rotate_pacman_half_down {0% {transform: rotate(90deg)}50% {transform: rotate(0deg)}to {transform: rotate(90deg)}}

    @keyframes rotate_pacman_half_down {0% {transform: rotate(90deg)}50% {transform: rotate(0deg)}to {transform: rotate(90deg)}}
    
    @-webkit-keyframes pacman-balls {75% {opacity: .7}to {transform: translate(-100px, -6.25px)}}

    @keyframes pacman-balls {75% {opacity: .7}to {transform: translate(-100px, -6.25px)}}
    
   
    .loading-image div:nth-child(3),
    .loading-image div:nth-child(4),
    .loading-image div:nth-child(5),
    .loading-image div:nth-child(6){
        background-color: #49b1f5;
        width: 15px;
        height: 15px;
        border-radius: 100%;
        margin: 2px;
        width: 10px;
        height: 10px;
        position: absolute;
        transform: translateY(-6.25px);
        top: 25px;
        left: 100px;
    }
    .loading-text{
        margin-bottom: 20vh;
        text-align: center;
        color: #2c3e50;
        font-size: 2rem;
        box-sizing: border-box;
        padding: 0 10px;
        text-shadow: 0 2px 10px rgba(0,0,0,0.2);
    }
    @media only screen and (max-width: 500px) {
         .loading-text{
            font-size: 1.5rem;
         }
    }
    .fadeout {
        opacity: 0;
        filter: alpha(opacity=0);
    }
    /* logo出现动画 */
    @-webkit-keyframes fadeInDown{0%{opacity:0;-webkit-transform:translate3d(0,-100%,0);transform:translate3d(0,-100%,0)}100%{opacity:1;-webkit-transform:none;transform:none}}
    @keyframes fadeInDown{0%{opacity:0;-webkit-transform:translate3d(0,-100%,0);}}
 </style>
 <script>
(function () {
    const loaded = function(){
       setTimeout(function(){
            const loader = document.getElementById("loading-container");
            loader.className="fadeout" ;//使用渐隐的方法淡出loading page
            // document.getElementById("body-wrap").style.display="flex";
            setTimeout(function(){
                loader.style.display="none";
            },1000); 
        },1000);//强制显示loading page 1s  
    };
    loaded();
})()
 </script><meta name="generator" content="Hexo 4.2.1"><link rel="alternate" href="/atom.xml" title="欢迎来到，TWOTO 的博客" type="application/atom+xml">
<link rel="stylesheet" href="/css/prism-tomorrow.css" type="text/css">
<link rel="stylesheet" href="/css/prism-line-numbers.css" type="text/css"><style type="text/css" lang="css">
    #loading-container{
        position: fixed;
        top: 0;
        left: 0;
        min-height: 100vh;
        width: 100vw;
        z-index: 9999;
        display: flex;
        flex-direction: column;
        justify-content: center;
        align-items: center;
        background: #FFF;
        text-align: center;
        /* loader页面消失采用渐隐的方式*/
        -webkit-transition: opacity 1s ease;
        -moz-transition: opacity 1s ease;
        -o-transition: opacity 1s ease;
        transition: opacity 1s ease;
    }
    .loading-image{
        width: 120px;
        height: 50px;
        transform: translate(-50%);
    }
    
    .loading-image div:nth-child(2) {
        -webkit-animation: pacman-balls 1s linear 0s infinite;
        animation: pacman-balls 1s linear 0s infinite
    }

    .loading-image div:nth-child(3) {
        -webkit-animation: pacman-balls 1s linear .33s infinite;
        animation: pacman-balls 1s linear .33s infinite
    }

    .loading-image div:nth-child(4) {
        -webkit-animation: pacman-balls 1s linear .66s infinite;
        animation: pacman-balls 1s linear .66s infinite
    }

    .loading-image div:nth-child(5) {
        -webkit-animation: pacman-balls 1s linear .99s infinite;
        animation: pacman-balls 1s linear .99s infinite
    }
    
   .loading-image div:first-of-type {
        width: 0;
        height: 0;
        border: 25px solid #49b1f5;
        border-right-color: transparent;
        border-radius: 25px;
        -webkit-animation: rotate_pacman_half_up .5s 0s infinite;
        animation: rotate_pacman_half_up .5s 0s infinite;
    }
    .loading-image div:nth-child(2) {
        width: 0;
        height: 0;
        border: 25px solid #49b1f5;
        border-right-color: transparent;
        border-radius: 25px;
        -webkit-animation: rotate_pacman_half_down .5s 0s infinite;
        animation: rotate_pacman_half_down .5s 0s infinite;
        margin-top: -50px;
    }
    @-webkit-keyframes rotate_pacman_half_up {0% {transform: rotate(270deg)}50% {transform: rotate(1turn)}to {transform: rotate(270deg)}}

    @keyframes rotate_pacman_half_up {0% {transform: rotate(270deg)}50% {transform: rotate(1turn)}to {transform: rotate(270deg)}}

    @-webkit-keyframes rotate_pacman_half_down {0% {transform: rotate(90deg)}50% {transform: rotate(0deg)}to {transform: rotate(90deg)}}

    @keyframes rotate_pacman_half_down {0% {transform: rotate(90deg)}50% {transform: rotate(0deg)}to {transform: rotate(90deg)}}
    
    @-webkit-keyframes pacman-balls {75% {opacity: .7}to {transform: translate(-100px, -6.25px)}}

    @keyframes pacman-balls {75% {opacity: .7}to {transform: translate(-100px, -6.25px)}}
    
   
    .loading-image div:nth-child(3),
    .loading-image div:nth-child(4),
    .loading-image div:nth-child(5),
    .loading-image div:nth-child(6){
        background-color: #49b1f5;
        width: 15px;
        height: 15px;
        border-radius: 100%;
        margin: 2px;
        width: 10px;
        height: 10px;
        position: absolute;
        transform: translateY(-6.25px);
        top: 25px;
        left: 100px;
    }
    .loading-text{
        margin-bottom: 20vh;
        text-align: center;
        color: #2c3e50;
        font-size: 2rem;
        box-sizing: border-box;
        padding: 0 10px;
        text-shadow: 0 2px 10px rgba(0,0,0,0.2);
    }
    @media only screen and (max-width: 500px) {
         .loading-text{
            font-size: 1.5rem;
         }
    }
    .fadeout {
        opacity: 0;
        filter: alpha(opacity=0);
    }
    /* logo出现动画 */
    @-webkit-keyframes fadeInDown{0%{opacity:0;-webkit-transform:translate3d(0,-100%,0);transform:translate3d(0,-100%,0)}100%{opacity:1;-webkit-transform:none;transform:none}}
    @keyframes fadeInDown{0%{opacity:0;-webkit-transform:translate3d(0,-100%,0);}}
 </style>
 <script>
(function () {
    const loaded = function(){
       setTimeout(function(){
            const loader = document.getElementById("loading-container");
            loader.className="fadeout" ;//使用渐隐的方法淡出loading page
            // document.getElementById("body-wrap").style.display="flex";
            setTimeout(function(){
                loader.style.display="none";
            },1000); 
        },1000);//强制显示loading page 1s  
    };
    loaded();
})()
 </script></head>



 <div id="loading-container">
     <p class="loading-text">玩命加载中 . . . </p> 
     <div class="loading-image">
         <div></div>
         <div></div>
         <div></div>
         <div></div> 
         <div></div>
     </div>
 </div><body>
<!--动态线条背景-->
<script type="text/javascript"
color="122 103 238" opacity='0.5' zIndex="-1" count="200"
src="//cdn.bootcss.com/canvas-nest.js/1.0.0/canvas-nest.min.js">
</script>

<header class="navbar-fixed">
    <nav id="headNav" class="bg-color nav-transparent">
        <div id="navContainer" class="nav-wrapper container">
            <div class="brand-logo">
                <a href="/" class="waves-effect waves-light">
                    
                    <img src="/medias/mylogo.png" class="logo-img" alt="LOGO">
                    
                    <span class="logo-span">欢迎来到，TWOTO 的博客</span>
                </a>
            </div>
            

<a href="#" data-target="mobile-nav" class="sidenav-trigger button-collapse"><i class="fas fa-bars"></i></a>
<ul class="right nav-menu">
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/" class="waves-effect waves-light">
      
      <i class="fas fa-home" style="zoom: 0.6;"></i>
      
      <span>首页</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/tags" class="waves-effect waves-light">
      
      <i class="fas fa-tags" style="zoom: 0.6;"></i>
      
      <span>标签</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/categories" class="waves-effect waves-light">
      
      <i class="fas fa-bookmark" style="zoom: 0.6;"></i>
      
      <span>分类</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/archives" class="waves-effect waves-light">
      
      <i class="fas fa-archive" style="zoom: 0.6;"></i>
      
      <span>归档</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/about" class="waves-effect waves-light">
      
      <i class="fas fa-user-circle" style="zoom: 0.6;"></i>
      
      <span>关于</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/contact" class="waves-effect waves-light">
      
      <i class="fas fa-comments" style="zoom: 0.6;"></i>
      
      <span>留言板</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/friends" class="waves-effect waves-light">
      
      <i class="fas fa-address-book" style="zoom: 0.6;"></i>
      
      <span>友情链接</span>
    </a>
    
  </li>
  
  <li>
    <a href="#searchModal" class="modal-trigger waves-effect waves-light">
      <i id="searchIcon" class="fas fa-search" title="搜索" style="zoom: 0.85;"></i>
    </a>
  </li>
</ul>


<div id="mobile-nav" class="side-nav sidenav">

    <div class="mobile-head bg-color">
        
        <img src="/medias/mylogo.png" class="logo-img circle responsive-img">
        
        <div class="logo-name">欢迎来到，TWOTO 的博客</div>
        <div class="logo-desc">
            
            技术、效率、摄影
            
        </div>
    </div>

    

    <ul class="menu-list mobile-menu-list">
        
        <li class="m-nav-item">
	  
		<a href="/" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-home"></i>
			
			首页
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/tags" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-tags"></i>
			
			标签
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/categories" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-bookmark"></i>
			
			分类
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/archives" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-archive"></i>
			
			归档
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/about" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-user-circle"></i>
			
			关于
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/contact" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-comments"></i>
			
			留言板
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/friends" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-address-book"></i>
			
			友情链接
		</a>
          
        </li>
        
        
        <li><div class="divider"></div></li>
        <li>
            <a href="https://github.com/DongZhouGu" class="waves-effect waves-light" target="_blank">
                <i class="fab fa-github-square fa-fw"></i>Fork Me
            </a>
        </li>
        
    </ul>
</div>


        </div>

        
            <style>
    .nav-transparent .github-corner {
        display: none !important;
    }

    .github-corner {
        position: absolute;
        z-index: 10;
        top: 0;
        right: 0;
        border: 0;
        transform: scale(1.1);
    }

    .github-corner svg {
        color: #0f9d58;
        fill: #fff;
        height: 64px;
        width: 64px;
    }

    .github-corner:hover .octo-arm {
        animation: a 0.56s ease-in-out;
    }

    .github-corner .octo-arm {
        animation: none;
    }

    @keyframes a {
        0%,
        to {
            transform: rotate(0);
        }
        20%,
        60% {
            transform: rotate(-25deg);
        }
        40%,
        80% {
            transform: rotate(10deg);
        }
    }
</style>

<a href="https://github.com/DongZhouGu" class="github-corner tooltipped hide-on-med-and-down" target="_blank"
   data-tooltip="Fork Me" data-position="left" data-delay="50">
    <svg viewBox="0 0 250 250" aria-hidden="true">
        <path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path>
        <path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2"
              fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path>
        <path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z"
              fill="currentColor" class="octo-body"></path>
    </svg>
</a>
        
    </nav>

</header>





<div class="bg-cover pd-header post-cover" style="background-image: url('https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/medias/featureimages/9.jpg')">
    <div class="container" style="right: 0px;left: 0px;">
        <div class="row">
            <div class="col s12 m12 l12">
                <div class="brand">
                    <h1 class="description center-align post-title">scikit-learn系列四：逻辑回归</h1>
                </div>
            </div>
        </div>
    </div>
</div>




<main class="post-container content">

    
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/tocbot/tocbot.css">
<style>
    #articleContent h1::before,
    #articleContent h2::before,
    #articleContent h3::before,
    #articleContent h4::before,
    #articleContent h5::before,
    #articleContent h6::before {
        display: block;
        content: " ";
        height: 100px;
        margin-top: -100px;
        visibility: hidden;
    }

    #articleContent :focus {
        outline: none;
    }

    .toc-fixed {
        position: fixed;
        top: 64px;
    }

    .toc-widget {
        width: 345px;
        padding-left: 20px;
    }

    .toc-widget .toc-title {
        margin: 35px 0 15px 0;
        padding-left: 17px;
        font-size: 1.5rem;
        font-weight: bold;
        line-height: 1.5rem;
    }

    .toc-widget ol {
        padding: 0;
        list-style: none;
    }

    #toc-content {
        height: calc(100vh - 250px);
        overflow: auto;
    }

    #toc-content ol {
        padding-left: 10px;
    }

    #toc-content ol li {
        padding-left: 10px;
    }

    #toc-content .toc-link:hover {
        color: #42b983;
        font-weight: 700;
        text-decoration: underline;
    }

    #toc-content .toc-link::before {
        background-color: transparent;
        max-height: 25px;

        position: absolute;
        right: 23.5vw;
        display: block;
    }

    #toc-content .is-active-link {
        color: #42b983;
    }

    #floating-toc-btn {
        position: fixed;
        right: 15px;
        bottom: 76px;
        padding-top: 15px;
        margin-bottom: 0;
        z-index: 998;
    }

    #floating-toc-btn .btn-floating {
        width: 48px;
        height: 48px;
    }

    #floating-toc-btn .btn-floating i {
        line-height: 48px;
        font-size: 1.4rem;
    }
</style>
<div class="row">
    <div id="main-content" class="col s12 m12 l9">
        <!-- 文章内容详情 -->
<div id="artDetail">
    <div class="card">
        <div class="card-content article-info">
            <div class="row tag-cate">
                <div class="col s7">
                    
                    <div class="article-tag">
                        
                            <a href="/tags/%E5%9F%BA%E7%A1%80%E7%9F%A5%E8%AF%86/">
                                <span class="chip bg-color">基础知识</span>
                            </a>
                        
                            <a href="/tags/ML%E7%AE%97%E6%B3%95/">
                                <span class="chip bg-color">ML算法</span>
                            </a>
                        
                    </div>
                    
                </div>
                <div class="col s5 right-align">
                    
                    <div class="post-cate">
                        <i class="fas fa-bookmark fa-fw icon-category"></i>
                        
                            <a href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/" class="post-category">
                                机器学习
                            </a>
                        
                    </div>
                    
                </div>
            </div>

            <div class="post-info">
                
                <div class="post-date info-break-policy">
                    <i class="far fa-calendar-minus fa-fw"></i>发布日期:&nbsp;&nbsp;
                    2020-06-30
                </div>
                

                

                
                <div class="info-break-policy">
                    <i class="far fa-file-word fa-fw"></i>文章字数:&nbsp;&nbsp;
                    5k
                </div>
                

                

                
                    <div id="busuanzi_container_page_pv" class="info-break-policy">
                        <i class="far fa-eye fa-fw"></i>阅读次数:&nbsp;&nbsp;
                        <span id="busuanzi_value_page_pv"></span>
                    </div>
				
            </div>
        </div>
        <hr class="clearfix">
        <div class="card-content article-card-content">
            <div id="articleContent">
                <h2 id="项目地址传送门，欢迎-star-和-fork-！"><a href="#项目地址传送门，欢迎-star-和-fork-！" class="headerlink" title="项目地址传送门，欢迎 star 和 fork ！"></a>项目地址<a href="https://github.com/DongZhouGu/scikit-learn-ml" target="_blank" rel="noopener">传送门</a>，欢迎 star 和 fork ！</h2><h2 id="1-Logistic-回归概述"><a href="#1-Logistic-回归概述" class="headerlink" title="1. Logistic 回归概述"></a>1. Logistic 回归概述</h2><p>Logistic 回归 或者叫逻辑回归，虽然名字有回归，但是它是用来做分类的。其主要思想是: 根据现有数据对分类边界线 (Decision Boundary) 建立回归公式，以此进行分类。</p>
<h2 id="2-算法原理"><a href="#2-算法原理" class="headerlink" title="2. 算法原理"></a>2. 算法原理</h2><p>假设有一场足球赛，我们有两支球队的所有出场球员信息、历史交锋成绩、比赛时间、主客场、裁判和天气等信息，根据这些信息预测球队的输赢。假设比赛结果记为y，赢球标记为1，输球标记为0，这就是典型的二元分类问题，可以用逻辑回归算法来解决。</p>
<p>与线性回归算法的最大区别是，逻辑回归算法的输出是个离散值。</p>
<h3 id="2-1-预测函数"><a href="#2-1-预测函数" class="headerlink" title="2.1 预测函数"></a>2.1 预测函数</h3><p>需要找出一个预测函数模型，使其值输出在[0,1]之间。然后选择一个基准值，如0.5，如果算出来的预测值大于0.5，就认为其预测值为1，反之，则其预测值为0。</p>
<p>选择Sigmoid函数（也称为Logistic函数，逻辑回归的名字由此而来）<br>$$<br>g(z)=\frac{1}{1+e^{-z}}<br>$$<br>来作为预测函数，其中e是自然对数的底数。以z为横坐标，以g(z)为纵坐标，画出的图形如下所示：</p>
<p><img src="/medias/loading.gif" data-original="https://cdn.jsdelivr.net/gh/dongzhougu/imageuse1/LR_3.png" alt="Sigmoid 函数在不同坐标下的图片"></p>
<p>从图中可以看出，当z=0时，g(z)=0.5；当z&gt;0时，g(z)&gt;0.5，当z越来越大时，g(z)无限接近于1；当z&lt;0时，g(z)&lt;0.5，当z越来越小时，g(z)无限接近于0。这正是我们想要的针对二元分类算法的预测函数。</p>
<h3 id="2-2-判定边界"><a href="#2-2-判定边界" class="headerlink" title="2.2 判定边界"></a>2.2 判定边界</h3><p>逻辑回归算法的预测函数由下面两个公式给出：<br>$$<br>h_{\theta}(x)=g\left(\theta^{T} x\right)<br>$$</p>
<p>$$<br>g(z)=\frac{1}{1+e^{-z}}<br>$$</p>
<p>下面给出两个判定边界的例子。假设有两个变量x1，x2，其逻辑回归预测函数是$h_{\theta}(x)=g\left(\theta_{0}+\theta_{1} x_{1}+\theta_{2} x_{2}\right)$</p>
<p>假设给定参数：<br>$$<br>\theta=\left[\begin{array}{c}<br>-3 \<br>1 \<br>1<br>\end{array}\right]<br>$$<br>那么，可以得到判定边$-3+x_{1}+x_{2}=0$ ，如果以 $x_{1}$ 为横坐标， $x_{2}$  为纵坐标，则这个函数画出来就是一条通过(0,3)和(3,0)两点的直线。这条线就是判定边界，其中，直线左下方为y=0，直线右上方为y=1，如图所示：</p>
<p><img src="/medias/loading.gif" data-original="https://cdn.jsdelivr.net/gh/dongzhougu/imageuse1/17634123-0bf913a36c2847a8.png" alt="img"></p>
<p>如果预测函数是多项式 $h_{\theta}(x)=g\left(\theta_{0}+\theta_{1} x_{1}+\theta_{2} x_{2}+\theta_{3} x_{1}^{2}+\theta_{4} x_{2}^{2}\right)$，且给定</p>
<p>$$<br>\theta=\left[\begin{array}{c}<br>-1 \<br>0 \<br>0 \<br>1 \<br>1<br>\end{array}\right]<br>$$<br>则可以得到判定边界函数$x_{1}^{2}+x_{2}^{2}=1$ 则这是一个半径为1的圆。圆内部是y=0，圆外部是y=1，如上图所示。</p>
<h3 id="2-3-损失函数"><a href="#2-3-损失函数" class="headerlink" title="2.3 损失函数"></a>2.3 损失函数</h3><p>我们不能使用线性回归模型的损失函数来推导逻辑回归的损失函数，因为那样的损失函数太复杂，最终很可能会导致无法通过迭代找到损失函数值最小的点。</p>
<p>为了容易地求出损失函数的最小值，我们分成 y=1 和 y=0 两种情况来分别考虑其预测值和真实值的误差。我们先考虑最简单的情况，即计算某个样本 x，y=1 和 y=0 两种情况下的预测值与真实值的误差，我们选择的损失公式如下：</p>
<p>$\operatorname{cost}\left(h_{\theta}(x), y\right)=\left{\begin{array}{ccc}-\log \left(h_{\theta}(x)\right), &amp; \text { if } &amp; y=1 \ -\log \left(1-h_{\theta}(x)\right), &amp; \text { if } &amp; y=0\end{array}\right.$</p>
<p>其中， $h_{\theta}(x)$ 表示预测为1的概率，log(x)为自然对数。如图所示</p>
<p><img src="/medias/loading.gif" data-original="https://cdn.jsdelivr.net/gh/dongzhougu/imageuse1/image-20200630141514106.png" alt="img"></p>
<p>根据损失函数的定义，损失是预测值与真实值的差异。当差异越大时，损失越大，模型受到的“惩罚”也越严重。在左图中，当 y=1 时，随着（预测为1的概率）越来越大，预测值越来越接近真实值，其损失越来越小；在右图中，当 y=0 时，随着（预测为1的概率）越来越大，预测值越来越偏离真实值，其损失越来越大。</p>
<h3 id="2-4-梯度下降算法"><a href="#2-4-梯度下降算法" class="headerlink" title="2.4 梯度下降算法"></a>2.4 梯度下降算法</h3><p>和线性回归类似，这里使用梯度下降算法来求解逻辑回归模型参数。具体可见上一节 <a href="https://dongzhougu.github.io/2020/06/30/scikit-learn-xi-lie-san-xian-xing-hui-gui/">线性回归回归算法</a>。</p>
<h2 id="3-多元分类"><a href="#3-多元分类" class="headerlink" title="3. 多元分类"></a>3. 多元分类</h2><p>逻辑回归模型可以解决二元分类问题，即 y={0,1}，能不能解决多元分类问题呢？答案是肯定的。针对多元分类问题，y={0,1,2,3,…,n}，总共有n+1个类别。其解决思路是：首先把问题转换为二元分类问题，即y=0是一个类别，y={1,2,3,…,n}作为另外一个类别，然后计算这两个类别的概率；接着，把y=1作为一个类别，把y={0,2,3,…,n}作为另外一个类别，再计算这两个类别的概率。</p>
<h2 id="4-正则化"><a href="#4-正则化" class="headerlink" title="4. 正则化"></a>4. 正则化</h2><p>我们知道，过拟合是指模型很好地拟合了训练样本，但对新数据预测的准确性很差，这是因为模型太复杂了。解决办法是减少输入特征的个数，或者获取更多的训练样本。这里介绍的正则化也可以用来解决过拟合问题：</p>
<ul>
<li><p>保留所有的特征，减少特征的权重 $\theta_{j} $ 的值。确保所有的特征对预测值都有少量的贡献。</p>
</li>
<li><p>当每个特征 $x_{j} $ 对预测值y都有少量的贡献时，这样的模型可以良好的工作，这正是正则化的目的，可以用它来解决特征过多时的过拟合问题。</p>
</li>
</ul>
<h3 id="4-1-线性回归模型正则化"><a href="#4-1-线性回归模型正则化" class="headerlink" title="4.1 线性回归模型正则化"></a>4.1 线性回归模型正则化</h3><p>我们先来看线性回归模型的损失函数是如何正则化的：<br>$$<br>J(\theta)=\frac{1}{2 m} \sum_{i=1}^{m}\left(h_{\theta}\left(x^{(i)}\right)-y^{(i)}\right)^{2}+\lambda \sum_{j=1}^{n} \theta_{j}^{2}<br>$$<br>公式中前半部分就是原来的线性回归模型的损失函数，也称为预测值与实际值的误差。后半部分为加入的正则项。其中 $\lambda $ 的值有两个目的，即要维持对训练样本的拟合，又要避免对训练样本的过拟合。如果  $\lambda $  的值太大，则能确保不出现过拟合，但可能会导致对现有训练样本出现欠拟合。</p>
<h3 id="4-2-线性回归模型正则化"><a href="#4-2-线性回归模型正则化" class="headerlink" title="4.2 线性回归模型正则化"></a>4.2 线性回归模型正则化</h3><p>同样，可以对逻辑回归模型的损失函数进行正则化，其方法也是在原来的损失函数的基础上加上正则项：<br>$$<br>J(\theta)=-\frac{1}{m}\left[\sum_{i=1}^{m} y^{(i)} \log \left(h_{\theta}\left(x^{(i)}\right)\right)+\left(1-y^{(i)}\right) \log \left(1-h_{\theta}\left(x^{(i)}\right)\right)\right]+\frac{\lambda}{2 m} \sum_{j=1}^{n} \theta_{j}^{2}<br>$$</p>
<h2 id="5-算法参数"><a href="#5-算法参数" class="headerlink" title="5. 算法参数"></a>5. 算法参数</h2><p>在 <code>scikit-learn</code>里，逻辑回归模型由类 <code>sklearn.linear_model.LogisticRegression</code>实现。</p>
<h3 id="5-1-正则项权重"><a href="#5-1-正则项权重" class="headerlink" title="5.1 正则项权重"></a>5.1 正则项权重</h3><p>上面介绍的正则项权重  $\lambda $  ，在<code>LogisticRegression</code>类里有个参数 C 与之对应，但成反比。即 C 值越大，  $\lambda $ 越小，模型容易出现过拟合；C 值越小，  $\lambda $  越大，模型容易出现欠拟合。</p>
<h3 id="5-2-L1-L2范数"><a href="#5-2-L1-L2范数" class="headerlink" title="5.2 L1/L2范数"></a>5.2 L1/L2范数</h3><p>创建逻辑回归模型时，有个参数penalty（惩罚），其取值有“l1”或“l2”</p>
<ul>
<li>L1范数作为正则项，会让模型参数 $\theta$ 稀疏化，即让模型参数向量里的0元素尽可能多，只保留模型参数向量中重要特征的贡献。</li>
<li>L2范数作为正则项，则让模型参数尽量小，但不会为0，即尽量让每个特征对应预测值都有一些小的贡献。</li>
</ul>
<p>假设模型只有两个参数，它们构成一个二维向量 $\theta=\left[\theta_{1}, \theta_{2}\right]$,则L1范数为：</p>
<p>$|\theta|<em>{1}=\left|\theta</em>{1}\right|+\left|\theta_{2}\right|$</p>
<p>即L1范数是向量里元素的绝对值之和。L2范数为向量里所有元素的平方和的算术平方根：</p>
<p>$|\theta|<em>{2}=\sqrt{\theta</em>{1}^{2}+\theta_{2}^{2}}$</p>
<p>我们知道，梯度下降算法在参数迭代的过程中，实际上是在损失函数的等高线上跳跃，并最终收敛在误差最小的点上。那么正则项的本质是什么？正则项的本质是惩罚。在参数迭代的过程中，如果没有遵循正则项所表达的规则，那么其损失会变大，即受到了惩罚，从而往正则项所表达的规则处收敛。正则化后的模型参数应该收敛在误差等高线与正则项等高线相切的点上。</p>
<p>作为推论，L1范数作为正则项，有以下几个用途：</p>
<ul>
<li>选择重要特征：L1范数会让模型参数向量里的元素为0的点尽量多，这样可以排除掉那些对预测值没有什么影响的特征，从而简化问题。所以L1范数解决过拟合，实际上是减少特征数量。</li>
<li>模型可解释性好：模型参数向量稀疏化后，只会留下那些对预测值有重要影响的特征。这样我们就容易解释模型的因果关系。比如，针对某种癌症的筛查，如果有100个特征，那么我们无从解释到底哪些特征对阳性呈关键作用。稀疏化后，只留下几个关键的特征，就容易看到因果关系。</li>
</ul>
<p>由此可见，L1范数作为正则项，更多的是一个分析工具，而适合用来对模型求解。因为它会把不重要的特征直接去除。大部分的情况下解决过拟合问题，还是选择L2范数作为正则项，这也是 <code>scikit-learn</code> 里的默认值。</p>
<h2 id="6-示例：乳腺癌检测"><a href="#6-示例：乳腺癌检测" class="headerlink" title="6 示例：乳腺癌检测"></a>6 示例：乳腺癌检测</h2><p>本节来看一个实例，使用逻辑回归算法解决乳腺癌检测问题。我们需要先采集肿瘤病灶造影图片，然后对图片进行分析，从图片中提取特征，再根据特征来训练模型。最终使用模型来检测新采集到的肿瘤病灶造影，以便判断肿瘤是良性的还是恶性的。这是个典型的二元分类问题。</p>
<h3 id="6-1-数据采集及特征提取"><a href="#6-1-数据采集及特征提取" class="headerlink" title="6.1 数据采集及特征提取"></a>6.1 数据采集及特征提取</h3><p>为了简单起见，直接加载 <code>scikit-learn</code> 自带的一个乳腺癌数据集。这个数据集是已经采集后的数据：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token keyword">from</span> sklearn<span class="token punctuation">.</span>datasets <span class="token keyword">import</span> load_breast_cancer
cancer <span class="token operator">=</span> load_breast_cancer<span class="token punctuation">(</span><span class="token punctuation">)</span>
X <span class="token operator">=</span> cancer<span class="token punctuation">.</span>data
y <span class="token operator">=</span> cancer<span class="token punctuation">.</span>target
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'data shape: {0}; no. positive: {1}; no. negative: {2}'</span>
      <span class="token punctuation">.</span>format<span class="token punctuation">(</span>X<span class="token punctuation">.</span>shape<span class="token punctuation">,</span>y<span class="token punctuation">[</span>y<span class="token operator">==</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">.</span>shape<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span>y<span class="token punctuation">[</span>y<span class="token operator">==</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">.</span>shape<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>cancer<span class="token punctuation">.</span>data<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>输出如下：</p>
<pre class="line-numbers language-python"><code class="language-python">data shape<span class="token punctuation">:</span> <span class="token punctuation">(</span><span class="token number">569</span><span class="token punctuation">,</span> <span class="token number">30</span><span class="token punctuation">)</span><span class="token punctuation">;</span> no<span class="token punctuation">.</span> positive<span class="token punctuation">:</span> <span class="token number">357</span><span class="token punctuation">;</span> no<span class="token punctuation">.</span> negative<span class="token punctuation">:</span> <span class="token number">212</span>
<span class="token punctuation">[</span><span class="token number">1.799e+01</span> <span class="token number">1.038e+01</span> <span class="token number">1.228e+02</span> <span class="token number">1.001e+03</span> <span class="token number">1.184e-01</span> <span class="token number">2.776e-01</span> <span class="token number">3.001e-01</span>
 <span class="token number">1.471e-01</span> <span class="token number">2.419e-01</span> <span class="token number">7.871e-02</span> <span class="token number">1.095e+00</span> <span class="token number">9.053e-01</span> <span class="token number">8.589e+00</span> <span class="token number">1.534e+02</span>
 <span class="token number">6.399e-03</span> <span class="token number">4.904e-02</span> <span class="token number">5.373e-02</span> <span class="token number">1.587e-02</span> <span class="token number">3.003e-02</span> <span class="token number">6.193e-03</span> <span class="token number">2.538e+01</span>
 <span class="token number">1.733e+01</span> <span class="token number">1.846e+02</span> <span class="token number">2.019e+03</span> <span class="token number">1.622e-01</span> <span class="token number">6.656e-01</span> <span class="token number">7.119e-01</span> <span class="token number">2.654e-01</span>
 <span class="token number">4.601e-01</span> <span class="token number">1.189e-01</span><span class="token punctuation">]</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>数据集中总共有569个样本，每个样本有30个特征，其中357个阳性（y=1）样本，212个阴性（y=0）样本。同时，还打印出一个样本数据，以便直观地进行观察。</p>
<p>这30个特征是怎么来的呢？这个数据集总共从病灶造影图片中提取了以下10个关键属性：</p>
<ul>
<li>radius：半径，即病灶中心点离边界的平均距离。</li>
<li>texture：纹理，灰度值的标准偏差。</li>
<li>perimeter：周长，即病灶的大小。</li>
<li>area：面积，也是反映病灶大小的一个指标。</li>
<li>smoothness：平滑度，即半径的变化幅度。</li>
<li>compactness：密实度，周长的平方除以面积，再减去1</li>
<li>concavity：凹度，凹陷部分轮廓的严重程度。</li>
<li>concave points：凹点，凹陷轮廓的数量。</li>
<li>symmetry：对称性。</li>
<li>fractal demension：分形维度。</li>
</ul>
<p>实际上它只关注10个特征，然后又构造出了每个特征的标准差及最大值，这样每个特征就衍生出了两个特征，所以总共就有了30个特征。可以通过 <code>cancer.feature_names</code> 变量来查看这些特征的名称。</p>
<h3 id="6-2-模型训练"><a href="#6-2-模型训练" class="headerlink" title="6.2 模型训练"></a>6.2 模型训练</h3><p>首先，把数据集分成训练数据集和测试数据集：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token keyword">from</span> sklearn<span class="token punctuation">.</span>model_selection <span class="token keyword">import</span> train_test_split
X_train<span class="token punctuation">,</span>X_test<span class="token punctuation">,</span>y_train<span class="token punctuation">,</span>y_test <span class="token operator">=</span> train_test_split<span class="token punctuation">(</span>X<span class="token punctuation">,</span>y<span class="token punctuation">,</span>test_size<span class="token operator">=</span><span class="token number">0.2</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span></span></code></pre>
<p>然后使用 <code>LogisticRegression</code> 模型来训练，并计算训练数据集的评分数据和测试数据集的评分数据：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token keyword">from</span> sklearn<span class="token punctuation">.</span>linear_model <span class="token keyword">import</span> LogisticRegression
model <span class="token operator">=</span> LogisticRegression<span class="token punctuation">(</span><span class="token punctuation">)</span>
model<span class="token punctuation">.</span>fit<span class="token punctuation">(</span>X_train<span class="token punctuation">,</span>y_train<span class="token punctuation">)</span>
train_score <span class="token operator">=</span> model<span class="token punctuation">.</span>score<span class="token punctuation">(</span>X_train<span class="token punctuation">,</span>y_train<span class="token punctuation">)</span>
test_score <span class="token operator">=</span> model<span class="token punctuation">.</span>score<span class="token punctuation">(</span>X_test<span class="token punctuation">,</span>y_test<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'train score: {train_score:.6f}; test_score:{test_score:.6f}'</span>
      <span class="token punctuation">.</span>format<span class="token punctuation">(</span>train_score<span class="token operator">=</span>train_score<span class="token punctuation">,</span>
             test_score<span class="token operator">=</span>test_score<span class="token punctuation">)</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>输出如下：</p>
<pre class="line-numbers language-css"><code class="language-css">train <span class="token property">score</span><span class="token punctuation">:</span> <span class="token number">0.940659</span><span class="token punctuation">;</span> <span class="token property">test_score</span><span class="token punctuation">:</span><span class="token number">0.964912</span><span aria-hidden="true" class="line-numbers-rows"><span></span></span></code></pre>
<p>观察模型在测试样本集的表现：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token keyword">import</span> numpy <span class="token keyword">as</span> np
y_pred <span class="token operator">=</span> model<span class="token punctuation">.</span>predict<span class="token punctuation">(</span>X_test<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'matchs: {0}/{1}'</span><span class="token punctuation">.</span>format<span class="token punctuation">(</span>np<span class="token punctuation">.</span>equal<span class="token punctuation">(</span>y_pred<span class="token punctuation">,</span>y_test<span class="token punctuation">)</span><span class="token punctuation">.</span>shape<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span>y_test<span class="token punctuation">.</span>shape<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span></span></code></pre>
<p>输出如下：</p>
<pre class="line-numbers language-python"><code class="language-python">matchs<span class="token punctuation">:</span> <span class="token number">114</span><span class="token operator">/</span><span class="token number">114</span><span aria-hidden="true" class="line-numbers-rows"><span></span></span></code></pre>
<p>总共114个测试样本，全部预测正确。为什么 <code>testscore</code> 却只有0.973684，而不是1呢？答案是，<code>scikit-learn</code>不是使用这个数据来计算分数，因为这个数据不能完全反映误差情况，而是使用预测概率数据计算模型评分。</p>
<p>针对二元分类问题，<code>LogisticRegression</code>模型会对每个样本输出两个概率，即为 0 的概率和为 1 的概率，哪个概率高就预测为哪个类别。</p>
<p>找出测试数据集中预测“自信度”低于90%的样本。这里先计算出测试数据集里的每个样本的预测概率数据，针对每个样本，它会有两个数据，一是预测其为阳性的概率，另外一个是预测其为阴性的概率。接着找出预测为阴性的概率大于0.1且小于0.9的样本（同时也是预测为阳性的概率大于0.1小于0.9），这些样本就是“自信度”不足90%的样本。</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token comment" spellcheck="true"># 预测概率：找出预测概率低于 90% 的样本</span>
y_pred_proba <span class="token operator">=</span> model<span class="token punctuation">.</span>predict_proba<span class="token punctuation">(</span>X_test<span class="token punctuation">)</span>  <span class="token comment" spellcheck="true"># 计算每个测试样本的预测概率</span>
<span class="token comment" spellcheck="true"># 找出第一列，即预测为阴性的概率大于 0.1 的样本，保存在 result 里</span>
y_pred_proba_0 <span class="token operator">=</span> y_pred_proba<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">]</span> <span class="token operator">></span> <span class="token number">0.1</span>
result <span class="token operator">=</span> y_pred_proba<span class="token punctuation">[</span>y_pred_proba_0<span class="token punctuation">]</span>
<span class="token comment" spellcheck="true"># 在 result 结果集里，找出第二列，即预测为阳性的概率大于 0.1 的样本</span>
y_pred_proba_1 <span class="token operator">=</span> result<span class="token punctuation">[</span><span class="token punctuation">:</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">]</span> <span class="token operator">></span> <span class="token number">0.1</span>
<span class="token keyword">print</span><span class="token punctuation">(</span>result<span class="token punctuation">[</span>y_pred_proba_1<span class="token punctuation">]</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>输出如下：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">0.29623162</span> <span class="token number">0.70376838</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.54660262</span> <span class="token number">0.45339738</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.17874247</span> <span class="token number">0.82125753</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.20917573</span> <span class="token number">0.79082427</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.10943452</span> <span class="token number">0.89056548</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.35503614</span> <span class="token number">0.64496386</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.23849987</span> <span class="token number">0.76150013</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.13634228</span> <span class="token number">0.86365772</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.80171734</span> <span class="token number">0.19828266</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.21744759</span> <span class="token number">0.78255241</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.81346356</span> <span class="token number">0.18653644</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.2225791</span>  <span class="token number">0.7774209</span> <span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.10788007</span> <span class="token number">0.89211993</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.88068005</span> <span class="token number">0.11931995</span><span class="token punctuation">]</span>
 <span class="token punctuation">[</span><span class="token number">0.18189724</span> <span class="token number">0.81810276</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>由此可见，计算预测概率使用model.predict_proba()函数，而计算预测分类用model.predict()函数。</p>
<h3 id="6-3-模型优化"><a href="#6-3-模型优化" class="headerlink" title="6.3 模型优化"></a>6.3 模型优化</h3><p>首先，使用Pipeline来增加多项式特征：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token keyword">from</span> sklearn<span class="token punctuation">.</span>linear_model <span class="token keyword">import</span> LogisticRegression
<span class="token keyword">from</span> sklearn<span class="token punctuation">.</span>preprocessing <span class="token keyword">import</span> PolynomialFeatures
<span class="token keyword">from</span> sklearn<span class="token punctuation">.</span>pipeline <span class="token keyword">import</span> Pipeline

<span class="token comment" spellcheck="true"># 增加多项式预处理</span>
<span class="token keyword">def</span> <span class="token function">polynomial_model</span><span class="token punctuation">(</span>degree<span class="token operator">=</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token operator">**</span>kwarg<span class="token punctuation">)</span><span class="token punctuation">:</span>
    polynomial_features <span class="token operator">=</span> PolynomialFeatures<span class="token punctuation">(</span>degree<span class="token operator">=</span>degree<span class="token punctuation">,</span>
                                             include_bias<span class="token operator">=</span><span class="token boolean">False</span><span class="token punctuation">)</span>
    logistic_regression <span class="token operator">=</span> LogisticRegression<span class="token punctuation">(</span><span class="token operator">**</span>kwarg<span class="token punctuation">)</span>
    pipeline <span class="token operator">=</span> Pipeline<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">(</span><span class="token string">"polynomial_features"</span><span class="token punctuation">,</span> polynomial_features<span class="token punctuation">)</span><span class="token punctuation">,</span>
                         <span class="token punctuation">(</span><span class="token string">"logistic_regression"</span><span class="token punctuation">,</span> logistic_regression<span class="token punctuation">)</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
    <span class="token keyword">return</span> pipeline<span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>接着，增加二阶多项式特征，创建并训练模型：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token keyword">import</span> time
model <span class="token operator">=</span> polynomial_model<span class="token punctuation">(</span>degree<span class="token operator">=</span><span class="token number">2</span><span class="token punctuation">,</span> penalty<span class="token operator">=</span><span class="token string">'l1'</span><span class="token punctuation">,</span> solver<span class="token operator">=</span><span class="token string">'liblinear'</span><span class="token punctuation">)</span>
start <span class="token operator">=</span> time<span class="token punctuation">.</span>process_time<span class="token punctuation">(</span><span class="token punctuation">)</span>
model<span class="token punctuation">.</span>fit<span class="token punctuation">(</span>X_train<span class="token punctuation">,</span> y_train<span class="token punctuation">)</span>
train_score <span class="token operator">=</span> model<span class="token punctuation">.</span>score<span class="token punctuation">(</span>X_train<span class="token punctuation">,</span> y_train<span class="token punctuation">)</span>
test_score <span class="token operator">=</span> model<span class="token punctuation">.</span>score<span class="token punctuation">(</span>X_test<span class="token punctuation">,</span> y_test<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'elaspe: {0:.6f}; train_score: {1:0.6f}; cv_score: {2:.6f}'</span><span class="token punctuation">.</span>format<span class="token punctuation">(</span>
    time<span class="token punctuation">.</span>process_time<span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">-</span> start<span class="token punctuation">,</span> train_score<span class="token punctuation">,</span> test_score<span class="token punctuation">)</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>使用L1范数作为正则项（参数penalty=’l1’），输出如下：</p>
<pre class="line-numbers language-python"><code class="language-python">elaspe<span class="token punctuation">:</span> <span class="token number">0.156250</span><span class="token punctuation">;</span> train_score<span class="token punctuation">:</span> <span class="token number">1.000000</span><span class="token punctuation">;</span> cv_score<span class="token punctuation">:</span> <span class="token number">0.956140</span><span aria-hidden="true" class="line-numbers-rows"><span></span></span></code></pre>
<p>可以看到，训练数据集评分和测试数据集评分都增加了。为什么使用L1范数作为正则项呢？L1范数作为正则项可以实现参数的稀疏化，即自动选择出那些对模型有关联的重要特征。</p>
<pre class="line-numbers language-python"><code class="language-python">logistic_regression <span class="token operator">=</span> model<span class="token punctuation">.</span>named_steps<span class="token punctuation">[</span><span class="token string">'logistic_regression'</span><span class="token punctuation">]</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'model parameters shape: {0}; count of non-zero element: {1}'</span><span class="token punctuation">.</span>format<span class="token punctuation">(</span>
    logistic_regression<span class="token punctuation">.</span>coef_<span class="token punctuation">.</span>shape<span class="token punctuation">,</span> 
    np<span class="token punctuation">.</span>count_nonzero<span class="token punctuation">(</span>logistic_regression<span class="token punctuation">.</span>coef_<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span></span></code></pre>
<p>输出如下：</p>
<pre class="line-numbers language-python"><code class="language-python">model parameters shape<span class="token punctuation">:</span> <span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">495</span><span class="token punctuation">)</span><span class="token punctuation">;</span> count of non<span class="token operator">-</span>zero element<span class="token punctuation">:</span> <span class="token number">110</span><span aria-hidden="true" class="line-numbers-rows"><span></span></span></code></pre>
<p>逻辑回归模型的coef_属性里保存的就是模型参数。从输出结果可以看到，增加二阶多项式特征后，输入特征由原来的30个增加到了495个，最终大多数特征都被丢弃，只保留了110个有效特征。</p>
<h3 id="6-4-学习曲线"><a href="#6-4-学习曲线" class="headerlink" title="6.4 学习曲线"></a>6.4 学习曲线</h3><p>首先画出使用L1范数作为正则项所对应的一阶和二阶多项式的学习曲线：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token keyword">from</span> utils <span class="token keyword">import</span> plot_learning_curve
<span class="token keyword">from</span> sklearn<span class="token punctuation">.</span>model_selection <span class="token keyword">import</span> ShuffleSplit
<span class="token keyword">from</span> matplotlib <span class="token keyword">import</span> pyplot <span class="token keyword">as</span> plt

cv <span class="token operator">=</span> ShuffleSplit<span class="token punctuation">(</span>n_splits<span class="token operator">=</span><span class="token number">10</span><span class="token punctuation">,</span> test_size<span class="token operator">=</span><span class="token number">0.2</span><span class="token punctuation">,</span> random_state<span class="token operator">=</span><span class="token number">0</span><span class="token punctuation">)</span>
title <span class="token operator">=</span> <span class="token string">'Learning Curves (degree={0}, penalty={1})'</span>
degrees <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">]</span>
penalty <span class="token operator">=</span> <span class="token string">'l1'</span>

start <span class="token operator">=</span> time<span class="token punctuation">.</span>process_time<span class="token punctuation">(</span><span class="token punctuation">)</span>
plt<span class="token punctuation">.</span>figure<span class="token punctuation">(</span>figsize<span class="token operator">=</span><span class="token punctuation">(</span><span class="token number">12</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">)</span><span class="token punctuation">,</span> dpi<span class="token operator">=</span><span class="token number">144</span><span class="token punctuation">)</span>
<span class="token keyword">for</span> i <span class="token keyword">in</span> range<span class="token punctuation">(</span>len<span class="token punctuation">(</span>degrees<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    plt<span class="token punctuation">.</span>subplot<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">,</span> len<span class="token punctuation">(</span>degrees<span class="token punctuation">)</span><span class="token punctuation">,</span> i <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">)</span>
    plot_learning_curve<span class="token punctuation">(</span>plt<span class="token punctuation">,</span> polynomial_model<span class="token punctuation">(</span>degree<span class="token operator">=</span>degrees<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">,</span> penalty<span class="token operator">=</span>penalty<span class="token punctuation">,</span> solver<span class="token operator">=</span><span class="token string">'liblinear'</span><span class="token punctuation">,</span> max_iter<span class="token operator">=</span><span class="token number">300</span><span class="token punctuation">)</span><span class="token punctuation">,</span> 
                        title<span class="token punctuation">.</span>format<span class="token punctuation">(</span>degrees<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">,</span> penalty<span class="token punctuation">)</span><span class="token punctuation">,</span> X<span class="token punctuation">,</span> y<span class="token punctuation">,</span> ylim<span class="token operator">=</span><span class="token punctuation">(</span><span class="token number">0.8</span><span class="token punctuation">,</span> <span class="token number">1.01</span><span class="token punctuation">)</span><span class="token punctuation">,</span> cv<span class="token operator">=</span>cv<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'elaspe: {0:.6f}'</span><span class="token punctuation">.</span>format<span class="token punctuation">(</span>time<span class="token punctuation">.</span>process_time<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token operator">-</span>start<span class="token punctuation">)</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>输出的结果如下：</p>
<pre class="line-numbers language-python"><code class="language-python">l1_elaspe<span class="token punctuation">:</span> <span class="token number">10.781250</span><span aria-hidden="true" class="line-numbers-rows"><span></span></span></code></pre>
<p>L1范数学习曲线如下图所示：</p>
<p><img src="/medias/loading.gif" data-original="https://cdn.jsdelivr.net/gh/dongzhougu/imageuse1/17634123-08d8944e88c2ae64.png" alt="image-20200630141514106"></p>
<p>接着画出使用L2范数作为正则项所对应的一阶和二阶多项式的学习曲线：</p>
<pre class="line-numbers language-python"><code class="language-python"><span class="token keyword">import</span> warnings
warnings<span class="token punctuation">.</span>filterwarnings<span class="token punctuation">(</span><span class="token string">"ignore"</span><span class="token punctuation">)</span>

penalty <span class="token operator">=</span> <span class="token string">'l2'</span>

start <span class="token operator">=</span> time<span class="token punctuation">.</span>clock<span class="token punctuation">(</span><span class="token punctuation">)</span>
plt<span class="token punctuation">.</span>figure<span class="token punctuation">(</span>figsize<span class="token operator">=</span><span class="token punctuation">(</span><span class="token number">12</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">)</span><span class="token punctuation">,</span> dpi<span class="token operator">=</span><span class="token number">144</span><span class="token punctuation">)</span>
<span class="token keyword">for</span> i <span class="token keyword">in</span> range<span class="token punctuation">(</span>len<span class="token punctuation">(</span>degrees<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    plt<span class="token punctuation">.</span>subplot<span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">,</span> len<span class="token punctuation">(</span>degrees<span class="token punctuation">)</span><span class="token punctuation">,</span> i <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">)</span>
    plot_learning_curve<span class="token punctuation">(</span>plt<span class="token punctuation">,</span> polynomial_model<span class="token punctuation">(</span>degree<span class="token operator">=</span>degrees<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">,</span> penalty<span class="token operator">=</span>penalty<span class="token punctuation">,</span> solver<span class="token operator">=</span><span class="token string">'lbfgs'</span><span class="token punctuation">)</span><span class="token punctuation">,</span> 
                        title<span class="token punctuation">.</span>format<span class="token punctuation">(</span>degrees<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">,</span> penalty<span class="token punctuation">)</span><span class="token punctuation">,</span> X<span class="token punctuation">,</span> y<span class="token punctuation">,</span> ylim<span class="token operator">=</span><span class="token punctuation">(</span><span class="token number">0.8</span><span class="token punctuation">,</span> <span class="token number">1.01</span><span class="token punctuation">)</span><span class="token punctuation">,</span> cv<span class="token operator">=</span>cv<span class="token punctuation">)</span>

<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'elaspe: {0:.6f}'</span><span class="token punctuation">.</span>format<span class="token punctuation">(</span>time<span class="token punctuation">.</span>clock<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token operator">-</span>start<span class="token punctuation">)</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>输出的结果如下：</p>
<pre class="line-numbers language-python"><code class="language-python">l2_elaspe<span class="token punctuation">:</span> <span class="token number">2.718750</span><span aria-hidden="true" class="line-numbers-rows"><span></span></span></code></pre>
<p>L2范数学习曲线如下图所示：</p>
<p><img src="/medias/loading.gif" data-original="https://cdn.jsdelivr.net/gh/dongzhougu/imageuse1/image-20200630141743010.png" alt="image-20200630141743010"></p>
<p>可以明显地看出，使用二阶多项式并使用L1范数作为正则项的模型最优，因为它的训练样本评分最高，交叉验证样本评分也最高。从图中还可以看出，训练样本评分和交叉验证样本评分之间的间隙还比较大，我们可以采集更多的数据来训练模型，以便进一步优化模型。</p>
<p>另外从输出的时间可以看出，L1 范数对应的学习曲线，需要花费较长的时间，原因是，<code>scikit-learn</code> 的<code>learning_curve()</code> 函数在画学习曲线的过程中，要对模型进行多次训练，并计算交叉验证样本评分。同时，为了使曲线更平滑，针对每个点还会进行多次计算求平均值。这个就是 <code>ShuffleSplit</code> 类的作用。在我们这个实例里，只有569个训练样本，这是个很小的数据集。如果数据集增加100倍，甚至1000倍，拿出来画学习曲线将是场灾难。</p>
<p>那么，针对大数据集，怎样高效地画学习曲线？答案很简单，可以从大数据集里选择一小部分数据来画学习曲线，待选择好最优的模型之后，再使用全部的数据集来训练模型。但是要尽量保持选择出来的这部分数据的标签分布与大数据集的标签分布相同，如针对二元分类，阳性和阴性比例要一致。更直观的说就是，抽取出来的样本集为原来数据集的一个缩影，尽可能相似。</p>
<h2 id="7-拓展阅读"><a href="#7-拓展阅读" class="headerlink" title="7.拓展阅读"></a>7.拓展阅读</h2><p>实际上，我们的预测函数就是写成向量形式的：<br>$$<br>h_{\theta}(x)=g(z)=g\left(\theta^{T} x\right)=\frac{1}{1+e^{-\theta^{T} x}}<br>$$<br>这个预测函数一次只计算一个训练样本的预测值，怎样一次性计算出所有样本的预测值呢？答案是把预测函数的参数写成向量的形式：<br>$$<br>h=g(X \theta)<br>$$<br>其中g(x)为Sigmoid函数。X为m×n的矩阵，即数据集的矩阵表达。损失函数也有对应的矩阵形式：<br>$$<br>J(\theta)=\frac{1}{m}\left(-y^{T} \log (h)-(1-y)^{T} \log (1-h)\right)<br>$$<br>其中，y为目标值向量，h为一次性计算出来的所有样本的预测值。</p>
<script>
        document.querySelectorAll('.github-emoji')
          .forEach(el => {
            if (!el.dataset.src) { return; }
            const img = document.createElement('img');
            img.style = 'display:none !important;';
            img.src = el.dataset.src;
            img.addEventListener('error', () => {
              img.remove();
              el.style.color = 'inherit';
              el.style.backgroundImage = 'none';
              el.style.background = 'none';
            });
            img.addEventListener('load', () => {
              img.remove();
            });
            document.body.appendChild(img);
          });
      </script>
            </div>
            <hr/>

            

    <div class="reprint" id="reprint-statement">
        
            <div class="reprint__author">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-user">
                        文章作者:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="/about" rel="external nofollow noreferrer">DongZhou</a>
                </span>
            </div>
            <div class="reprint__type">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-link">
                        文章链接:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="https://dongzhougu.github.io/2020/06/30/scikit-learn-xi-lie-si-luo-ji-hui-gui/">https://dongzhougu.github.io/2020/06/30/scikit-learn-xi-lie-si-luo-ji-hui-gui/</a>
                </span>
            </div>
            <div class="reprint__notice">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-copyright">
                        版权声明:
                    </i>
                </span>
                <span class="reprint-info">
                    本博客所有文章除特別声明外，均采用
                    <a href="https://creativecommons.org/licenses/by/4.0/deed.zh" rel="external nofollow noreferrer" target="_blank">CC BY 4.0</a>
                    许可协议。转载请注明来源
                    <a href="/about" target="_blank">DongZhou</a>
                    !
                </span>
            </div>
        
    </div>

    <script async defer>
      document.addEventListener("copy", function (e) {
        let toastHTML = '<span>复制成功，请遵循本文的转载规则</span><button class="btn-flat toast-action" onclick="navToReprintStatement()" style="font-size: smaller">查看</a>';
        M.toast({html: toastHTML})
      });

      function navToReprintStatement() {
        $("html, body").animate({scrollTop: $("#reprint-statement").offset().top - 80}, 800);
      }
    </script>



            <div class="tag_share" style="display: block;">
                <div class="post-meta__tag-list" style="display: inline-block;">
                    
                        <div class="article-tag">
                            
                                <a href="/tags/%E5%9F%BA%E7%A1%80%E7%9F%A5%E8%AF%86/">
                                    <span class="chip bg-color">基础知识</span>
                                </a>
                            
                                <a href="/tags/ML%E7%AE%97%E6%B3%95/">
                                    <span class="chip bg-color">ML算法</span>
                                </a>
                            
                        </div>
                    
                </div>
                <div class="post_share" style="zoom: 80%; width: fit-content; display: inline-block; float: right; margin: -0.15rem 0;">
                    <link rel="stylesheet" type="text/css" href="/libs/share/css/share.min.css">
<div id="article-share">

    
    <div class="social-share" data-sites="google,qq,qzone,wechat,weibo,douban,linkedin" data-wechat-qrcode-helper="<p>微信扫一扫即可分享！</p>"></div>
    <script src="/libs/share/js/social-share.min.js"></script>
    

    

</div>

                </div>
            </div>
            
                <style>
    #reward {
        margin: 40px 0;
        text-align: center;
    }

    #reward .reward-link {
        font-size: 1.4rem;
        line-height: 38px;
    }

    #reward .btn-floating:hover {
        box-shadow: 0 6px 12px rgba(0, 0, 0, 0.2), 0 5px 15px rgba(0, 0, 0, 0.2);
    }

    #rewardModal {
        width: 320px;
        height: 350px;
    }

    #rewardModal .reward-title {
        margin: 15px auto;
        padding-bottom: 5px;
    }

    #rewardModal .modal-content {
        padding: 10px;
    }

    #rewardModal .close {
        position: absolute;
        right: 15px;
        top: 15px;
        color: rgba(0, 0, 0, 0.5);
        font-size: 1.3rem;
        line-height: 20px;
        cursor: pointer;
    }

    #rewardModal .close:hover {
        color: #ef5350;
        transform: scale(1.3);
        -moz-transform:scale(1.3);
        -webkit-transform:scale(1.3);
        -o-transform:scale(1.3);
    }

    #rewardModal .reward-tabs {
        margin: 0 auto;
        width: 210px;
    }

    .reward-tabs .tabs {
        height: 38px;
        margin: 10px auto;
        padding-left: 0;
    }

    .reward-content ul {
        padding-left: 0 !important;
    }

    .reward-tabs .tabs .tab {
        height: 38px;
        line-height: 38px;
    }

    .reward-tabs .tab a {
        color: #fff;
        background-color: #ccc;
    }

    .reward-tabs .tab a:hover {
        background-color: #ccc;
        color: #fff;
    }

    .reward-tabs .wechat-tab .active {
        color: #fff !important;
        background-color: #22AB38 !important;
    }

    .reward-tabs .alipay-tab .active {
        color: #fff !important;
        background-color: #019FE8 !important;
    }

    .reward-tabs .reward-img {
        width: 210px;
        height: 210px;
    }
</style>

<div id="reward">
    <a href="#rewardModal" class="reward-link modal-trigger btn-floating btn-medium waves-effect waves-light red">赏</a>

    <!-- Modal Structure -->
    <div id="rewardModal" class="modal">
        <div class="modal-content">
            <a class="close modal-close"><i class="fas fa-times"></i></a>
            <h4 class="reward-title">你的赏识是我前进的动力</h4>
            <div class="reward-content">
                <div class="reward-tabs">
                    <ul class="tabs row">
                        <li class="tab col s6 alipay-tab waves-effect waves-light"><a href="#alipay">支付宝</a></li>
                        <li class="tab col s6 wechat-tab waves-effect waves-light"><a href="#wechat">微 信</a></li>
                    </ul>
                    <div id="alipay">
                        <img src="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/medias/reward/alipay.jpg" class="reward-img" alt="支付宝打赏二维码">
                    </div>
                    <div id="wechat">
                        <img src="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/medias/reward/wechat.png" class="reward-img" alt="微信打赏二维码">
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>

<script>
    $(function () {
        $('.tabs').tabs();
    });
</script>

            
        </div>
    </div>

    

    

    

    

    
        <style>
    .valine-card {
        margin: 1.5rem auto;
    }

    .valine-card .card-content {
        padding: 20px 20px 5px 20px;
    }

    #vcomments textarea {
        box-sizing: border-box;
        background: url("/medias/comment_bg.png") 100% 100% no-repeat;
    }

    #vcomments p {
        margin: 2px 2px 10px;
        font-size: 1.05rem;
        line-height: 1.78rem;
    }

    #vcomments blockquote p {
        text-indent: 0.2rem;
    }

    #vcomments a {
        padding: 0 2px;
        color: #4cbf30;
        font-weight: 500;
        text-decoration: none;
    }

    #vcomments img {
        max-width: 100%;
        height: auto;
        cursor: pointer;
    }

    #vcomments ol li {
        list-style-type: decimal;
    }

    #vcomments ol,
    ul {
        display: block;
        padding-left: 2em;
        word-spacing: 0.05rem;
    }

    #vcomments ul li,
    ol li {
        display: list-item;
        line-height: 1.8rem;
        font-size: 1rem;
    }

    #vcomments ul li {
        list-style-type: disc;
    }

    #vcomments ul ul li {
        list-style-type: circle;
    }

    #vcomments table, th, td {
        padding: 12px 13px;
        border: 1px solid #dfe2e5;
    }

    #vcomments table, th, td {
        border: 0;
    }

    table tr:nth-child(2n), thead {
        background-color: #fafafa;
    }

    #vcomments table th {
        background-color: #f2f2f2;
        min-width: 80px;
    }

    #vcomments table td {
        min-width: 80px;
    }

    #vcomments h1 {
        font-size: 1.85rem;
        font-weight: bold;
        line-height: 2.2rem;
    }

    #vcomments h2 {
        font-size: 1.65rem;
        font-weight: bold;
        line-height: 1.9rem;
    }

    #vcomments h3 {
        font-size: 1.45rem;
        font-weight: bold;
        line-height: 1.7rem;
    }

    #vcomments h4 {
        font-size: 1.25rem;
        font-weight: bold;
        line-height: 1.5rem;
    }

    #vcomments h5 {
        font-size: 1.1rem;
        font-weight: bold;
        line-height: 1.4rem;
    }

    #vcomments h6 {
        font-size: 1rem;
        line-height: 1.3rem;
    }

    #vcomments p {
        font-size: 1rem;
        line-height: 1.5rem;
    }

    #vcomments hr {
        margin: 12px 0;
        border: 0;
        border-top: 1px solid #ccc;
    }

    #vcomments blockquote {
        margin: 15px 0;
        border-left: 5px solid #42b983;
        padding: 1rem 0.8rem 0.3rem 0.8rem;
        color: #666;
        background-color: rgba(66, 185, 131, .1);
    }

    #vcomments pre {
        font-family: monospace, monospace;
        padding: 1.2em;
        margin: .5em 0;
        background: #272822;
        overflow: auto;
        border-radius: 0.3em;
        tab-size: 4;
    }

    #vcomments code {
        font-family: monospace, monospace;
        padding: 1px 3px;
        font-size: 0.92rem;
        color: #e96900;
        background-color: #f8f8f8;
        border-radius: 2px;
    }

    #vcomments pre code {
        font-family: monospace, monospace;
        padding: 0;
        color: #e8eaf6;
        background-color: #272822;
    }

    #vcomments pre[class*="language-"] {
        padding: 1.2em;
        margin: .5em 0;
    }

    #vcomments code[class*="language-"],
    pre[class*="language-"] {
        color: #e8eaf6;
    }

    #vcomments [type="checkbox"]:not(:checked), [type="checkbox"]:checked {
        position: inherit;
        margin-left: -1.3rem;
        margin-right: 0.4rem;
        margin-top: -1px;
        vertical-align: middle;
        left: unset;
        visibility: visible;
    }

    #vcomments b,
    strong {
        font-weight: bold;
    }

    #vcomments dfn {
        font-style: italic;
    }

    #vcomments small {
        font-size: 85%;
    }

    #vcomments cite {
        font-style: normal;
    }

    #vcomments mark {
        background-color: #fcf8e3;
        padding: .2em;
    }

    #vcomments table, th, td {
        padding: 12px 13px;
        border: 1px solid #dfe2e5;
    }

    table tr:nth-child(2n), thead {
        background-color: #fafafa;
    }

    #vcomments table th {
        background-color: #f2f2f2;
        min-width: 80px;
    }

    #vcomments table td {
        min-width: 80px;
    }

    #vcomments [type="checkbox"]:not(:checked), [type="checkbox"]:checked {
        position: inherit;
        margin-left: -1.3rem;
        margin-right: 0.4rem;
        margin-top: -1px;
        vertical-align: middle;
        left: unset;
        visibility: visible;
    }
</style>

<div class="card valine-card" data-aos="fade-up">
    <div class="comment_headling" style="font-size: 20px; font-weight: 700; position: relative; padding-left: 20px; top: 15px; padding-bottom: 5px;">
        <i class="fas fa-comments fa-fw" aria-hidden="true"></i>
        <span>评论</span>
    </div>
    <div id="vcomments" class="card-content" style="display: grid">
    </div>
</div>

<script src="/libs/valine/av-min.js"></script>
<script src="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/valine/Valine.min.js"></script>
<script>
    new Valine({
        el: '#vcomments',
        appId: 'RPCuj0HNm1eqAREO6c5T7nSJ-gzGzoHsz',
        appKey: 'laCdQbWLFWOWdkXVM3RxoXGe',
        notify: 'false' === 'true',
        verify: 'false' === 'true',
        visitor: 'true' === 'true',
        avatar: 'mm',
        pageSize: '10',
        lang: 'zh-cn',
        placeholder: '快来留言吧'
    });
</script>

    

    

    

<article id="prenext-posts" class="prev-next articles">
    <div class="row article-row">
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge left-badge text-color">
                <i class="fas fa-chevron-left"></i>&nbsp;上一篇</div>
            <div class="card">
                <a href="/2020/06/30/scikit-learn-xi-lie-wu-jue-ce-shu/">
                    <div class="card-image">
                        
                        
                        <img src="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/medias/featureimages/16.jpg" class="responsive-img" alt="scikit-learn系列五：决策树">
                        
                        <span class="card-title">scikit-learn系列五：决策树</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            实现决策树算法，原理解释+titanic辛存者
                        
                    </div>
                    <div class="publish-info">
                        <span class="publish-date">
                            <i class="far fa-clock fa-fw icon-date"></i>2020-06-30
                        </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/" class="post-category">
                                    机器学习
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/%E5%9F%BA%E7%A1%80%E7%9F%A5%E8%AF%86/">
                        <span class="chip bg-color">基础知识</span>
                    </a>
                    
                    <a href="/tags/ML%E7%AE%97%E6%B3%95/">
                        <span class="chip bg-color">ML算法</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge right-badge text-color">
                下一篇&nbsp;<i class="fas fa-chevron-right"></i>
            </div>
            <div class="card">
                <a href="/2020/06/30/scikit-learn-xi-lie-san-xian-xing-hui-gui/">
                    <div class="card-image">
                        
                        
                        <img src="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/medias/featureimages/13.jpg" class="responsive-img" alt="scikit-learn系列三：线性回归">
                        
                        <span class="card-title">scikit-learn系列三：线性回归</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            实现线性回归算法，原理解释+房价预测案例
                        
                    </div>
                    <div class="publish-info">
                            <span class="publish-date">
                                <i class="far fa-clock fa-fw icon-date"></i>2020-06-30
                            </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/" class="post-category">
                                    机器学习
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/%E5%9F%BA%E7%A1%80%E7%9F%A5%E8%AF%86/">
                        <span class="chip bg-color">基础知识</span>
                    </a>
                    
                    <a href="/tags/ML%E7%AE%97%E6%B3%95/">
                        <span class="chip bg-color">ML算法</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
    </div>
</article>

</div>



<!-- 代码块功能依赖 -->
<script type="text/javascript" src="/libs/codeBlock/codeBlockFuction.js"></script>

<!-- 代码语言 -->

<script type="text/javascript" src="/libs/codeBlock/codeLang.js"></script>


<!-- 代码块复制 -->

<script type="text/javascript" src="/libs/codeBlock/codeCopy.js"></script>


<!-- 代码块收缩 -->

<script type="text/javascript" src="/libs/codeBlock/codeShrink.js"></script>


<!-- 代码块折行 -->

<style type="text/css">
code[class*="language-"], pre[class*="language-"] { white-space: pre !important; }
</style>


    </div>
    <div id="toc-aside" class="expanded col l3 hide-on-med-and-down">
        <div class="toc-widget">
            <div class="toc-title"><i class="far fa-list-alt"></i>&nbsp;&nbsp;目录</div>
            <div id="toc-content"></div>
        </div>
    </div>
</div>

<!-- TOC 悬浮按钮. -->

<div id="floating-toc-btn" class="hide-on-med-and-down">
    <a class="btn-floating btn-large bg-color">
        <i class="fas fa-list-ul"></i>
    </a>
</div>


<script src="https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/tocbot/tocbot.min.js"></script>
<script>
    $(function () {
        tocbot.init({
            tocSelector: '#toc-content',
            contentSelector: '#articleContent',
            headingsOffset: -($(window).height() * 0.4 - 45),
            collapseDepth: Number('0'),
            headingSelector: 'h2, h3, h4'
        });

        // modify the toc link href to support Chinese.
        let i = 0;
        let tocHeading = 'toc-heading-';
        $('#toc-content a').each(function () {
            $(this).attr('href', '#' + tocHeading + (++i));
        });

        // modify the heading title id to support Chinese.
        i = 0;
        $('#articleContent').children('h2, h3, h4').each(function () {
            $(this).attr('id', tocHeading + (++i));
        });

        // Set scroll toc fixed.
        let tocHeight = parseInt($(window).height() * 0.4 - 64);
        let $tocWidget = $('.toc-widget');
        $(window).scroll(function () {
            let scroll = $(window).scrollTop();
            /* add post toc fixed. */
            if (scroll > tocHeight) {
                $tocWidget.addClass('toc-fixed');
            } else {
                $tocWidget.removeClass('toc-fixed');
            }
        });

        
        /* 修复文章卡片 div 的宽度. */
        let fixPostCardWidth = function (srcId, targetId) {
            let srcDiv = $('#' + srcId);
            if (srcDiv.length === 0) {
                return;
            }

            let w = srcDiv.width();
            if (w >= 450) {
                w = w + 21;
            } else if (w >= 350 && w < 450) {
                w = w + 18;
            } else if (w >= 300 && w < 350) {
                w = w + 16;
            } else {
                w = w + 14;
            }
            $('#' + targetId).width(w);
        };

        // 切换TOC目录展开收缩的相关操作.
        const expandedClass = 'expanded';
        let $tocAside = $('#toc-aside');
        let $mainContent = $('#main-content');
        $('#floating-toc-btn .btn-floating').click(function () {
            if ($tocAside.hasClass(expandedClass)) {
                $tocAside.removeClass(expandedClass).hide();
                $mainContent.removeClass('l9');
            } else {
                $tocAside.addClass(expandedClass).show();
                $mainContent.addClass('l9');
            }
            fixPostCardWidth('artDetail', 'prenext-posts');
        });
        
    });
</script>

    

</main>


<script src="https://cdn.bootcss.com/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script>
    MathJax.Hub.Config({
        tex2jax: {inlineMath: [['$', '$'], ['\(', '\)']]}
    });
</script>



<footer class="page-footer bg-color">
    
    <div class="container row center-align" style="margin-bottom: 15px !important;">
        <div class="col s12 m8 l8 copy-right">
            Copyright&nbsp;&copy;
            <span id="year">2020</span>
            <a href="/about" target="_blank">DongZhou</a>
            |&nbsp;Powered by&nbsp;<a href="https://hexo.io/" target="_blank">Hexo</a>
            |&nbsp;Theme&nbsp;<a href="https://github.com/blinkfox/hexo-theme-matery" target="_blank">Matery</a>
            <br>
            
            &nbsp;<i class="fas fa-chart-area"></i>&nbsp;站点总字数:&nbsp;<span
                class="white-color">60.5k</span>&nbsp;字
            
            
            
            
            
            
            <span id="busuanzi_container_site_pv">
                |&nbsp;<i class="far fa-eye"></i>&nbsp;总访问量:&nbsp;<span id="busuanzi_value_site_pv"
                    class="white-color"></span>&nbsp;次
            </span>
            
            
            <span id="busuanzi_container_site_uv">
                |&nbsp;<i class="fas fa-users"></i>&nbsp;总访问人数:&nbsp;<span id="busuanzi_value_site_uv"
                    class="white-color"></span>&nbsp;人
            </span>
            
            <br>
            
            <span id="sitetime">载入运行时间...</span>
            <script>
                function siteTime() {
                    var seconds = 1000;
                    var minutes = seconds * 60;
                    var hours = minutes * 60;
                    var days = hours * 24;
                    var years = days * 365;
                    var today = new Date();
                    var startYear = "2020";
                    var startMonth = "6";
                    var startDate = "27";
                    var startHour = "0";
                    var startMinute = "0";
                    var startSecond = "0";
                    var todayYear = today.getFullYear();
                    var todayMonth = today.getMonth() + 1;
                    var todayDate = today.getDate();
                    var todayHour = today.getHours();
                    var todayMinute = today.getMinutes();
                    var todaySecond = today.getSeconds();
                    var t1 = Date.UTC(startYear, startMonth, startDate, startHour, startMinute, startSecond);
                    var t2 = Date.UTC(todayYear, todayMonth, todayDate, todayHour, todayMinute, todaySecond);
                    var diff = t2 - t1;
                    var diffYears = Math.floor(diff / years);
                    var diffDays = Math.floor((diff / days) - diffYears * 365);
                    var diffHours = Math.floor((diff - (diffYears * 365 + diffDays) * days) / hours);
                    var diffMinutes = Math.floor((diff - (diffYears * 365 + diffDays) * days - diffHours * hours) /
                        minutes);
                    var diffSeconds = Math.floor((diff - (diffYears * 365 + diffDays) * days - diffHours * hours -
                        diffMinutes * minutes) / seconds);
                    if (startYear == todayYear) {
                        document.getElementById("year").innerHTML = todayYear;
                        document.getElementById("sitetime").innerHTML = "本站已安全运行 " + diffDays + " 天 " + diffHours +
                            " 小时 " + diffMinutes + " 分钟 " + diffSeconds + " 秒";
                    } else {
                        document.getElementById("year").innerHTML = startYear + " - " + todayYear;
                        document.getElementById("sitetime").innerHTML = "本站已安全运行 " + diffYears + " 年 " + diffDays +
                            " 天 " + diffHours + " 小时 " + diffMinutes + " 分钟 " + diffSeconds + " 秒";
                    }
                }
                setInterval(siteTime, 1000);
            </script>
            
            <br>
            
        </div>
        <div class="col s12 m4 l4 social-link social-statis">
    <a href="https://github.com/DongZhouGu" class="tooltipped" target="_blank" data-tooltip="访问我的GitHub" data-position="top" data-delay="50">
        <i class="fab fa-github"></i>
    </a>



    <a href="mailto:gdz678@163.com" class="tooltipped" target="_blank" data-tooltip="邮件联系我" data-position="top" data-delay="50">
        <i class="fas fa-envelope-open"></i>
    </a>







    <a href="tencent://AddContact/?fromId=50&fromSubId=1&subcmd=all&uin=1596586942" class="tooltipped" target="_blank" data-tooltip="QQ联系我: 1596586942" data-position="top" data-delay="50">
        <i class="fab fa-qq"></i>
    </a>







    <a href="/atom.xml" class="tooltipped" target="_blank" data-tooltip="RSS 订阅" data-position="top" data-delay="50">
        <i class="fas fa-rss"></i>
    </a>

</div>
    </div>
</footer>

<div class="progress-bar"></div>


<!-- 搜索遮罩框 -->
<div id="searchModal" class="modal">
    <div class="modal-content">
        <div class="search-header">
            <span class="title"><i class="fas fa-search"></i>&nbsp;&nbsp;搜索</span>
            <input type="search" id="searchInput" name="s" placeholder="请输入搜索的关键字"
                   class="search-input">
        </div>
        <div id="searchResult"></div>
    </div>
</div>

<script src="/js/search.js"></script>
<script type="text/javascript">
$(function () {
    searchFunc("/search.xml", 'searchInput', 'searchResult');
});
</script>

<!-- 回到顶部按钮 -->
<div id="backTop" class="top-scroll">
    <a class="btn-floating btn-large waves-effect waves-light" href="#!">
        <i class="fas fa-arrow-up"></i>
    </a>
</div>


<script src=" https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/materialize/materialize.min.js"></script>
<script src=" https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/masonry/masonry.pkgd.min.js"></script>
<script src=" https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/aos/aos.js"></script>
<script src=" https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/scrollprogress/scrollProgress.min.js"></script>
<script src=" https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/lightGallery/js/lightgallery-all.min.js"></script>
<script src=" /js/matery.js"></script>

<!-- Baidu Analytics -->

<!-- Baidu Push -->

<script>
    (function () {
        var bp = document.createElement('script');
        var curProtocol = window.location.protocol.split(':')[0];
        if (curProtocol === 'https') {
            bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';
        } else {
            bp.src = 'http://push.zhanzhang.baidu.com/push.js';
        }
        var s = document.getElementsByTagName("script")[0];
        s.parentNode.insertBefore(bp, s);
    })();
</script>


<script src=" https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/others/clicklove.js" async="async"></script>


<script async src=" https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/others/busuanzi.pure.mini.js"></script>













<script src=" https://cdn.jsdelivr.net/gh/DongZhouGu/DongZhouGu.github.io/libs/instantpage/instantpage.js" type="module"></script>


<script>
            window.imageLazyLoadSetting = {
                isSPA: false,
                processImages: null,
            };
        </script><script>window.addEventListener("load",function(){var t=/\.(gif|jpg|jpeg|tiff|png)$/i,r=/^data:image\/[a-z]+;base64,/;Array.prototype.slice.call(document.querySelectorAll("img[data-original]")).forEach(function(a){var e=a.parentNode;"A"===e.tagName&&(e.href.match(t)||e.href.match(r))&&(e.href=a.dataset.original)})});</script><script>!function(n){n.imageLazyLoadSetting.processImages=o;var i=n.imageLazyLoadSetting.isSPA,r=Array.prototype.slice.call(document.querySelectorAll("img[data-original]"));function o(){i&&(r=Array.prototype.slice.call(document.querySelectorAll("img[data-original]")));for(var t,e,a=0;a<r.length;a++)t=r[a],e=void 0,0<=(e=t.getBoundingClientRect()).bottom&&0<=e.left&&e.top<=(n.innerHeight||document.documentElement.clientHeight)&&function(){var t,e,n,i,o=r[a];t=o,e=function(){r=r.filter(function(t){return o!==t})},n=new Image,i=t.getAttribute("data-original"),n.onload=function(){t.src=i,e&&e()},n.src=i}()}o(),n.addEventListener("scroll",function(){var t,e;t=o,e=n,clearTimeout(t.tId),t.tId=setTimeout(function(){t.call(e)},500)})}(this);</script></body>

</html>
